library(sem)

# Set number of simulated datasets
n <- 1000

# Set total number of variance parameters to examine
largest <- 14



######## Linear CRF

# Generating matrices to hold the results
FS <- FSK <- RF <- IV1 <- IV2 <- matrix(nrow=largest,ncol=4)
poverp <- matrix(nrow=largest,ncol=2)

# Set seed
set.seed(12345)

# Loop over the variance parameter sigma^2 (=j/6)
for (j in 1:largest){
	# Set variance
	par <- j/6

	# Create temporary holding matrices
	fs <- fsk <- ols <- iv1 <- iv2 <- matrix(nrow=n,ncol=2)

	# First stage (linear), first stage (coarsened), reduced form, and IV regressions  for each of the 1,000 datasets
	for (i in 1:n){
		z <- sample(rep(c(0,1),n/2))
		x <- round(z + rnorm(n,10,par))
		y <- 0.05*as.numeric(x>=1) + 0.05*as.numeric(x>=2) + 0.05*as.numeric(x>=3) + 
		  0.05*as.numeric(x>=4) + 0.05*as.numeric(x>=5) + 0.05*as.numeric(x>=6) + 0.05*as.numeric(x>=7) + 
		  0.05*as.numeric(x>=8) + 0.05*as.numeric(x>=9) + 0.05*as.numeric(x>=10) + 0.05*as.numeric(x>=11) + 
		  0.05*as.numeric(x>=12) + 0.05*as.numeric(x>=13) + 0.05*as.numeric(x>=14) + 0.05*as.numeric(x>=15) + 
		  0.05*as.numeric(x>=16) + 0.05*as.numeric(x>=17) + 0.05*as.numeric(x>=18) + rnorm(n,0.4,.2)
		d <- as.numeric(x>=11)

		fs[i,] <- summary(lm(x ~ z))$coefficients[2,1:2]
		fsk[i,] <- summary(lm(d ~ z))$coefficients[2,1:2]
		ols[i,] <- summary(lm(y ~ z))$coefficients[2,1:2]
		iv1[i,] <- summary(tsls(y ~ d, ~ z))$coefficients[2,1:2]
		iv2[i,] <- summary(tsls(y ~ x, ~ z))$coefficients[2,1:2]
	}

	# First stage ratio shown on the x axis
	poverp <- ( mean(fs[,1]) - mean(fsk[,1]) ) / mean(fsk[,1])

	FS[j,] <- cbind( par, poverp, mean(fs[,1]), mean(fs[,2])+(1+1/n)*var(fs[,1]) )
	FSK[j,] <- cbind( par, poverp, mean(fsk[,1]), mean(fsk[,2])+(1+1/n)*var(fsk[,1]) )
	RF[j,] <- cbind( par, poverp, mean(ols[,1]), mean(ols[,2])+(1+1/n)*var(ols[,1]) )
	IV1[j,] <- cbind( par, poverp, mean(iv1[,1]), mean(iv1[,2])+(1+1/n)*var(iv1[,1]) )
	IV2[j,] <- cbind( par, poverp, mean(iv2[,1]), mean(iv2[,2])+(1+1/n)*var(iv2[,1]) )
}

FS; FSK; RF; IV1; IV2





######## DISCONTINUOUS CRF

FS <- FSK <- RF <- IV1 <- IV2 <- matrix(nrow=largest,ncol=4)

set.seed(12345)


for (j in 1:largest){
	par <- j/6

	fs <- fsk <- ols <- iv1 <- iv2 <- matrix(nrow=n,ncol=2)

	for (i in 1:n){
		z <- rbinom(n,1,.5)
		x <- round(z + rnorm(n,10,par))
		y <- 0.05*as.numeric(x>=11) + rnorm(n,0.4,.2)
		d <- as.numeric(x>=11)

		fs[i,] <- summary(lm(x ~ z))$coefficients[2,1:2]
		fsk[i,] <- summary(lm(d ~ z))$coefficients[2,1:2]
		ols[i,] <- summary(lm(y ~ z))$coefficients[2,1:2]
		iv1[i,] <- summary(tsls(y ~ d, ~ z))$coefficients[2,1:2]
		iv2[i,] <- summary(tsls(y ~ x, ~ z))$coefficients[2,1:2]
	}

	poverp <- ( mean(fs[,1]) - mean(fsk[,1]) )/mean(fsk[,1])

	FS[j,] <- cbind( par, poverp, mean(fs[,1]), mean(fs[,2])+(1+1/n)*var(fs[,1]) )
	FSK[j,] <- cbind( par, poverp, mean(fsk[,1]), mean(fsk[,2])+(1+1/n)*var(fsk[,1]) )
	RF[j,] <- cbind( par, poverp, mean(ols[,1]), mean(ols[,2])+(1+1/n)*var(ols[,1]) )
	IV1[j,] <- cbind( par, poverp, mean(iv1[,1]), mean(iv1[,2])+(1+1/n)*var(iv1[,1]) )
	IV2[j,] <- cbind( par, poverp, mean(iv2[,1]), mean(iv2[,2])+(1+1/n)*var(iv2[,1]) )
}

FS; FSK; RF; IV1; IV2







######## INCORRECTLY IDENTIFIED DISCONTINUOUS CRF

FS <- FSK <- RF <- IV1 <- IV2 <- matrix(nrow=largest,ncol=4)

set.seed(12345)

for (j in 3:20){
	par <- j/12

	fs <- fsk <- ols <- iv1 <- iv2 <- matrix(nrow=n,ncol=2)

	for (i in 1:n){
		z <- rbinom(n,1,.5)
		x <- round(z + rnorm(n,10,par))
		y <- 0.05*as.numeric(x>=11) + rnorm(n,0.4,.2)
		d <- as.numeric(x>=12)

		fs[i,] <- summary(lm(x ~ z))$coefficients[2,1:2]
		fsk[i,] <- summary(lm(d ~ z))$coefficients[2,1:2]
		ols[i,] <- summary(lm(y ~ z))$coefficients[2,1:2]
		iv1[i,] <- summary(tsls(y ~ d, ~ z))$coefficients[2,1:2]
		iv2[i,] <- summary(tsls(y ~ x, ~ z))$coefficients[2,1:2]
	}

	poverp <- ( mean(fs[,1]) - mean(fsk[,1]) )/mean(fsk[,1])

	FS[j,] <- cbind( par, poverp, mean(fs[,1]), mean(fs[,2])+(1+1/n)*var(fs[,1]) )
	FSK[j,] <- cbind( par, poverp, mean(fsk[,1]), mean(fsk[,2])+(1+1/n)*var(fsk[,1]) )
	RF[j,] <- cbind( par, poverp, mean(ols[,1]), mean(ols[,2])+(1+1/n)*var(ols[,1]) )
	IV1[j,] <- cbind( par, poverp, mean(iv1[,1]), mean(iv1[,2])+(1+1/n)*var(iv1[,1]) )
	IV2[j,] <- cbind( par, poverp, mean(iv2[,1]), mean(iv2[,2])+(1+1/n)*var(iv2[,1]) )
}

FS; FSK; RF; IV1; IV2









######## Non-linear CRFs


FS <- FSK <- RF <- IV1 <- IV2 <- matrix(nrow=10,ncol=4)
poverp <- matrix(nrow=largest,ncol=2)

set.seed(12345)



for (j in 1:10){

f <- function(x) { x^(j/10)/20 - (x-1)^(j/10)/20 }

fs <- fsk <- ols <- iv1 <- iv2 <- matrix(nrow=n,ncol=2)

for (i in 1:n){

z <- rbinom(n,1,.5)
x <- round(z + rnorm(n,10,1))
y <- 0.05*as.numeric(x>=1) + f(2)*as.numeric(x>=2) + f(3)*as.numeric(x>=3) + 
  f(4)*as.numeric(x>=4) + f(5)*as.numeric(x>=5) + f(6)*as.numeric(x>=6) + f(7)*as.numeric(x>=7) + 
  f(8)*as.numeric(x>=8) + f(9)*as.numeric(x>=9) + f(10)*as.numeric(x>=10) + f(11)*as.numeric(x>=11) + 
  f(12)*as.numeric(x>=12) + f(13)*as.numeric(x>=13) + f(14)*as.numeric(x>=14) + f(15)*as.numeric(x>=15) + 
  f(16)*as.numeric(x>=16) + f(17)*as.numeric(x>=17) + f(18)*as.numeric(x>=18) + rnorm(n,0.4,.2)
d <- as.numeric(x>=11)

fs[i,] <- summary(lm(x ~ z))$coefficients[2,1:2]
fsk[i,] <- summary(lm(d ~ z))$coefficients[2,1:2]
ols[i,] <- summary(lm(y ~ z))$coefficients[2,1:2]
iv1[i,] <- summary(tsls(y ~ d, ~ z))$coefficients[2,1:2]
iv2[i,] <- summary(tsls(y ~ x, ~ z))$coefficients[2,1:2]

}

poverp <- ( mean(fs[,1]) - mean(fsk[,1]) )/mean(fsk[,1])

FS[j,] <- cbind( j, poverp, mean(fs[,1]), mean(fs[,2])+(1+1/n)*var(fs[,1]) )
FSK[j,] <- cbind( j, poverp, mean(fsk[,1]), mean(fsk[,2])+(1+1/n)*var(fsk[,1]) )
RF[j,] <- cbind( j, poverp, mean(ols[,1]), mean(ols[,2])+(1+1/n)*var(ols[,1]) )
IV1[j,] <- cbind( j, poverp, mean(iv1[,1]), mean(iv1[,2])+(1+1/n)*var(iv1[,1]) )
IV2[j,] <- cbind( j, poverp, mean(iv2[,1]), mean(iv2[,2])+(1+1/n)*var(iv2[,1]) )

}

FS; FSK; RF; IV1; IV2



# Figure A2 is produced from this data (transferred to excel) in Stata

